library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
── Conflicts ─────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(ggthemes)
library(scales)
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
library(readxl)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
color_a <- c("#58b5e1","#1c5b5a","#46ebdc","#1f4196","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#b7d165","#30d52e","#ff5357")
color_na <- c("#1c5b5a","#46ebdc","#1f4196","#e28de2","#818bd7","#e4ccf1","#82185f","#f849b6","#000000","#5e34bc","#b7d165","#30d52e","#ff5357")
counties <- c('Anson', 'Cabarrus', 'Catawba', 'Chester', 'Cleveland', 'Gaston', 'Iredell', 'Lancaster', 'Lincoln', 'Mecklenburg', 'Rowan', 'Stanly', 'Union', 'York')
attainment_lvl <- c('Highest Degree: Less than a High School Diploma', 'Highest Degree: High School Diploma', 'Highest Degree: Some College, No Degree', "Highest Degree: Associate's Degree", "Highest Degree: Bachelor's Degree", "Highest Degree: Graduate or Professional Degree")
foreign_detail <- c('Foreign-Born: Africa', 'Foreign-Born: Asia', 'Foreign-Born: Europe', 'Foreign-Born: Latin America', 'Place of Birth Total')
countypop <- rbind(read_csv("cc-est2019-agesex-37.csv", show_col_types = F),
read_csv("cc-est2019-agesex-45.csv", show_col_types = F)) %>%
select(-SUMLEV, -STATE, -COUNTY) %>%
mutate(CTYNAME = gsub(' County', '', CTYNAME),
YEAR = as.integer(YEAR + 2007)) %>%
filter(CTYNAME %in% counties, YEAR >= 2010,
!(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
distinct()
# Year 3 is 2010, Year 12 is 2019
# Making Charlotte Region
cr <- countypop[1:10,] %>%
mutate(CTYNAME = 'Charlotte Region')
for(i in 4:length(colnames(countypop))) {
for(j in 1:10){
cr[j,i] <- sum((countypop %>% filter(YEAR == j+2009))[i])
}
}
# Making Age & Gender data frame
pop_age_gender <- rbind(countypop, cr)
countypop <- cr %>% transmute(YEAR = YEAR, CHARLOTTEPOP = POPESTIMATE) %>% right_join(countypop, by = 'YEAR') %>% mutate(PROPORTION = POPESTIMATE / CHARLOTTEPOP) %>%
group_by(CTYNAME) %>%
mutate(CHANGE = ifelse(YEAR == 2010, 0, POPESTIMATE/lag(POPESTIMATE, default = first(YEAR)) - 1)) %>%
ungroup()
pop_age_gender <- pop_age_gender %>%
select(-contains('_TOT'), -POPEST_FEM, -POPEST_MALE, -AGE16PLUS_MALE, -AGE16PLUS_FEM, -AGE18PLUS_FEM, -AGE18PLUS_MALE, -UNDER5_FEM, -UNDER5_MALE, -AGE1544_FEM, -AGE1544_MALE, -MEDIAN_AGE_FEM, -MEDIAN_AGE_MALE, -AGE65PLUS_FEM,-AGE65PLUS_MALE, -AGE513_FEM, -AGE513_MALE, -AGE4564_FEM, -AGE4564_MALE, -AGE2544_FEM, -AGE2544_MALE, -AGE1824_FEM, -AGE1824_MALE, -AGE1417_FEM, -AGE1417_MALE) %>%
rename(AGE004_FEM = AGE04_FEM, AGE004_MALE = AGE04_MALE, AGE0509_MALE = AGE59_MALE, AGE0509_FEM = AGE59_FEM)
pop_age_gender <- pop_age_gender %>%
pivot_longer(cols = colnames(pop_age_gender[,5:40]), names_to = 'DEMO', values_to = 'POP') %>%
mutate(PERCENTAGE = POP/POPESTIMATE)
pop_age_gender <- pop_age_gender %>%
mutate(GENDER = as.factor(ifelse(grepl('MALE', pop_age_gender$DEMO),'MALE','FEMALE')),
DEMO = gsub('_FEM','', DEMO),
DEMO = gsub('_MALE','', DEMO),
DEMO = case_when(DEMO == 'AGE004' ~ '0-04',
DEMO == 'AGE0509' ~ '05-09',
DEMO == 'AGE1014' ~ '10-14',
DEMO == 'AGE1519' ~ '15-19',
DEMO == 'AGE2024' ~ '20-24',
DEMO == 'AGE2529' ~ '25-29',
DEMO == 'AGE3034' ~ '30-34',
DEMO == 'AGE3539' ~ '35-39',
DEMO == 'AGE4044' ~ '40-44',
DEMO == 'AGE4549' ~ '45-49',
DEMO == 'AGE5054' ~ '50-54',
DEMO == 'AGE5559' ~ '55-59',
DEMO == 'AGE6064' ~ '60-64',
DEMO == 'AGE6569' ~ '65-69',
DEMO == 'AGE7074' ~ '70-74',
DEMO == 'AGE7579' ~ '75-79',
DEMO == 'AGE8084' ~ '80-84',
DEMO == 'AGE85PLUS' ~ '85 and Over'))
# Making ethnicity data frame
ethpop <- rbind(read_csv("cc-est2019-alldata-37.csv", show_col_types = F),
read_csv("cc-est2019-alldata-45.csv", show_col_types = F)) %>%
mutate(CTYNAME = gsub(' County', '', CTYNAME),
YEAR = as.integer(YEAR + 2007),
WHITE = NHWA_MALE + NHWA_FEMALE,
BLACK = NHBA_MALE + NHBA_FEMALE,
HISPANIC = HWA_MALE + HWA_FEMALE + HBA_MALE + HBA_FEMALE + HIA_MALE + HIA_FEMALE + HAA_MALE + HAA_FEMALE + HNA_MALE + HNA_FEMALE + HIA_MALE + HIA_FEMALE,
ASIAN = NHAA_MALE + NHAA_FEMALE,
ISLANDER = NHNA_MALE + NHNA_FEMALE,
NATIVE = NHIA_MALE + NHIA_FEMALE,
MULTIRACIAL = TOM_MALE + TOM_FEMALE - HTOM_MALE - HTOM_FEMALE
) %>%
filter(CTYNAME %in% counties, YEAR >= 3, AGEGRP == 0,
!(STNAME == 'South Carolina' & CTYNAME == 'Union')) %>%
select(STNAME, CTYNAME, YEAR, TOT_POP, WHITE, BLACK, HISPANIC, ASIAN, ISLANDER, NATIVE, MULTIRACIAL) %>%
distinct()
ethpop <- ethpop %>%
pivot_longer(cols = colnames(ethpop[,5:11]), names_to = 'ETHNICITY', values_to = 'POP')
# Making place of birth data frame
birthplace <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Indicator == 'Place of Birth',
County %in% counties,
!(Measure %in% foreign_detail)) %>%
distinct()
birthplace <- birthplace %>% inner_join((birthplace %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))
`summarise()` has grouped output by 'County'. You can
override using the `.groups` argument.
# Making the unemployment data frame
unemployment <- rbind(read_excel('ur_anson.xlsx', trim_ws = T) %>% mutate(County = 'Anson', Period = gsub('M', '', Period)),
read_excel('ur_cabarrus.xlsx', trim_ws = T) %>% mutate(County = 'Cabarrus', Period = gsub('M', '', Period)),
read_excel('ur_catawba.xlsx', trim_ws = T) %>% mutate(County = 'Catawba', Period = gsub('M', '', Period)),
read_excel('ur_chester.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Chester', Period = gsub('M','',Period)) %>% select(-Label),
read_excel('ur_cleveland.xlsx', trim_ws = T) %>% mutate(County = 'Cleveland', Period = gsub('M', '', Period)),
read_excel('ur_gaston.xlsx', trim_ws = T) %>% mutate(County = 'Gaston', Period = gsub('M', '', Period)),
read_excel('ur_iredell.xlsx', trim_ws = T) %>% mutate(County = 'Iredell', Period = gsub('M', '', Period)),
read_excel('ur_lancaster.xlsx', trim_ws = T) %>% mutate(County = 'Lancaster', Period = gsub('M', '', Period)),
read_excel('ur_lincoln.xlsx', trim_ws = T) %>% mutate(County = 'Lincoln', Period = gsub('M', '', Period)),
read_excel('ur_mecklenburg.xlsx', trim_ws = T) %>% mutate(County = 'Mecklenburg', Period = gsub('M', '', Period)),
read_excel('ur_rowan.xlsx', trim_ws = T) %>% mutate(County = 'Rowan', Period = gsub('M', '', Period)),
read_excel('ur_stanly.xlsx', trim_ws=T, skip=11)[1:266,] %>% rename(Value = 'Observation Value') %>% mutate(County = 'Stanly', Period = gsub('M','',Period)) %>% select(-Label),
read_excel('ur_union.xlsx', trim_ws = T) %>% mutate(County = 'Union', Period = gsub('M', '', Period)),
read_excel('ur_york.xlsx', trim_ws = T) %>% mutate(County = 'York', Period = gsub('M', '', Period))) %>%
mutate(Year = as.integer(Year),
Period = as.integer(Period),
Date = as.Date(paste(Year,'-',Period, '-01', sep = '')),
Value = Value/100) %>%
rename(Month = Period,
Unemployment = Value)
# Make income data frame
income <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Indicator == 'Income & Earnings',
County %in% counties,
Measure != 'Household Income: Total') %>%
distinct()
income <- income %>% inner_join((income %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year'))
`summarise()` has grouped output by 'County'. You can
override using the `.groups` argument.
# Make education attainment data frame
education <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Indicator == 'Educational Attainment',
County %in% counties,
Measure %in% attainment_lvl) %>%
distinct()
education <- education %>% inner_join((education %>% group_by(County, Year) %>% summarise(Total = sum(Numerator_value))), by = c('County', 'Year')) %>%
mutate(Order = as.factor(case_when(
Measure == 'Highest Degree: Less than a High School Diploma' ~ 1,
Measure == 'Highest Degree: High School Diploma' ~ 2,
Measure == 'Highest Degree: Some College, No Degree' ~ 3,
Measure == "Highest Degree: Associate's Degree" ~ 4,
Measure == "Highest Degree: Bachelor's Degree" ~ 5,
Measure == "Highest Degree: Graduate or Professional Degree" ~ 6)))
`summarise()` has grouped output by 'County'. You can
override using the `.groups` argument.
# Make health care coverage data frame
coverage <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Indicator == 'Health Care Coverage',
County %in% counties)
# Make housing age data frame
housing <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Indicator == 'Housing Stock',
County %in% counties)
# Make poverty figures data frame
poverty <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Measure == 'Individuals in Poverty',
Theme == 'Social Well-Being',
County %in% counties)
# Make transportation means data frame
transportation <- read.csv('Values.csv') %>%
mutate(County = gsub(' County, North Carolina', '', County),
County = gsub(' County, South Carolina', '', County)) %>%
filter(Theme == 'Transportation',
Measure != 'Commuting Means Total',
County %in% counties)
plot_ly(countypop %>% filter(YEAR == 2019), x = ~POPESTIMATE, y = ~CTYNAME, type = 'bar', color = ~CTYNAME, colors = color_a, orientation = 'h')
plot_ly(countypop, x=~YEAR, y=~CHANGE, color=~CTYNAME, type='scatter', mode='lines', colors=color_a)
plot_ly(pop_age_gender %>% filter(YEAR == 2017, CTYNAME == 'Charlotte Region', GENDER == "MALE"),
y=~DEMO, x=~PERCENTAGE,
type='bar', name = 'Male') %>%
add_trace(data = pop_age_gender %>% filter(YEAR == 2017, CTYNAME == 'Charlotte Region', GENDER == "FEMALE"), y=~DEMO, name = 'Female')
plot_ly(ethpop %>% filter(YEAR == 2019),
y=~CTYNAME, x=~POP/TOT_POP, color=~ETHNICITY,
type='bar') %>%
layout(barmode = 'stack')
plot_ly(birthplace %>% filter(Year == 2019),
y=~County, x=~Numerator_value/Total, color=~Measure,
type='bar') %>%
layout(barmode = 'stack')
plot_ly(unemployment, x=~Date, y=~Unemployment, color=~County, colors=color_a, type='scatter', mode='lines')
plot_ly(unemployment %>% filter(Year==2015, Month==6), x=~Unemployment, y=~County, color=~County, colors=color_a, type='bar')
#### DFs from Values.csv are missing Anson, Chester, and Stanly Counties
ggplot(income %>% filter(Year == 2014), aes(x = County, y = (Numerator_value / Total), fill = Measure), position = 'fill') +
geom_col() +
scale_y_continuous(labels = scales::percent) +
coord_flip()
ggplot(arrange(education, Order) %>% filter(Year == 2014), aes(x = County, y = (Numerator_value / Total), fill = Order), position = 'fill') +
geom_col() +
scale_y_continuous(labels = scales::percent) +
coord_flip()+
scale_fill_discrete(labels = attainment_lvl, name = '')
coverage %>% filter(Measure == "Health Insurance Total", Year == 2017) %>%
ggplot(., aes(x = County, y = Numerator_value, fill = County))+
geom_col() +
scale_y_continuous(labels = comma) +
coord_flip()
coverage %>% filter(Year == 2017, !(Measure %in% c("Health Insurance Total", "People with Health Insurance"))) %>%
ggplot(aes(x = County, y = Numerator_value, fill = Measure, position = Measure)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = comma) +
coord_flip()
housing %>% filter(Year == 2017) %>%
ggplot(aes(x= County, y = Year-Numerator_value, fill = County)) +
geom_col() +
coord_flip()
##Commuting Modes
# Way too many missing values to create a substantive visualization.
transportation %>% filter(Year == 2014) %>%
ggplot(aes(x = County, y = Numerator_value, fill = Measure, position = Measure)) +
geom_col(position = 'dodge') +
scale_y_continuous(labels = comma) +
coord_flip()
Social Well-Being
Poverty